#manual branding - file won't load
transcend_cols = c("#1A4C81","#59C3B4","#EF464B","#ADE0EE")
transcend_cols2 = c("#BC2582","#FFA630","#FFDE42","#99C24D","#218380","#D3B7D7")
transcend_grays = c("#4D4D4F","#9D9FA2","#D1D3D4")
transcend_na = transcend_grays[2]
theme_transcend = theme_gdocs(base_size = 14, base_family = "Open Sans") +
  theme(
    plot.title = element_text(family = "Bebas Neue", color = "black"),
    plot.background = element_blank(),
    axis.text = element_text(colour = "black"),
    axis.title = element_text(colour = "black"),
    panel.border = element_rect(colour = "#4D4D4F"),
    strip.text = element_text(size = rel(0.8)),
    plot.margin = margin(10, 24, 10, 10, "pt")
  )
theme_set(theme_transcend)

Future resources

Level of concern

Guiding question: How concerned are school leaders about sustainability over the next 5 years?

The majority (61%) of Canopy school leaders reported feeling concerned about sustainability over the next 5 years. By contrast, only around 20% of school leaders indicated they were not concerned about sustainability.

variables %>% 
  select(school_id, future_resources) %>% 
  mutate(rate = 1) %>% 
  group_by(future_resources) %>% 
  summarize(n = sum(rate),
            pct = n/189) %>% 
  filter(!is.na(future_resources)) %>% 
  mutate(group = case_when(
    future_resources == "Yes, extremely" | future_resources == "Yes, somewhat" ~ "Concerned",
    future_resources == "No, not at all" | future_resources == "No, not very" ~ "Not concerned",
    TRUE ~ "Neutral"
  ),
  future_resources = factor(future_resources,
                            levels = c("Yes, extremely", "Yes, somewhat", "Neutral", "No, not very", "No, not at all"))) %>% 
  ggplot(., aes(future_resources, pct, fill = group)) +
  geom_col() +
  scale_fill_manual(values = c(transcend_cols[3], transcend_grays[1], transcend_cols[1])) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1), labels = scales::percent_format()) +
  theme(legend.position = "none", panel.grid.major.x = element_blank()) +
  labs(x = "", y = "Percent of schools", title = "School leader concern with future sustainability") +
  geom_text(aes(label = scales::label_percent(accuracy = 1)(pct)), nudge_y = 0.01, vjust = 0, color = transcend_na, fontface = "bold", size = 5.5, family = "sans") +
  scale_x_discrete(labels = wrap_format(10))

Guiding question: Are certain types of schools more/less likely to be concerned about resources?

Suburban schools were least concerned about future sustainability while schools serving rural or multiple geographic regions held the highest levels of concern.

variables %>% 
  select(school_id, future_resources, school_locale) %>% 
  mutate(concern = case_when(
    future_resources == "Yes, extremely" | future_resources == "Yes, somewhat" ~ "Concerned",
    future_resources == "No, not at all" | future_resources == "No, not very" ~ "Not concerned",
    TRUE ~ "Neutral"
  )) %>% 
  mutate(rate = 1) %>% 
  group_by(concern, school_locale) %>% 
  summarize(n = sum(rate)) %>% 
  ungroup() %>% 
  group_by(school_locale) %>% 
  mutate(sum = sum(n),
         pct = n/sum) %>% 
  ungroup() %>% 
  filter(!is.na(school_locale)) %>% 
  mutate(school_locale = factor(school_locale, levels = c("Multiple", "Rural", "Urban", "Suburban"))) %>%
  ggplot(., aes(school_locale, pct, fill = concern)) +
  geom_col() +
  scale_fill_manual(values = c(transcend_cols[3], transcend_na, transcend_cols[1])) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1), labels = scales::percent_format()) +
  geom_text(aes(label = scales::label_percent(accuracy = 1)(pct)), 
            position = position_stack(vjust = .5), #vjust = .95 before
            #hjust = 1,
            color = "white", 
            size = 4, 
            family = "sans") +
  theme(legend.position = "bottom", legend.direction = "horizontal", panel.grid.major.y = element_blank(), axis.text.y = element_text(size = 11)) +
  labs(x = "", y = "Percent of schools", title = str_wrap("School leader concern with future sustainability", 60), 
       fill = "Level of concern") +
  guides(fill = guide_legend(reverse = T)) +
  coord_flip()

Independent schools had the least concern about future sustainability, while public charter schools had the most concern.

variables %>% 
  select(school_id, future_resources, school_type) %>% 
  mutate(concern = case_when(
    future_resources == "Yes, extremely" | future_resources == "Yes, somewhat" ~ "Concerned",
    future_resources == "No, not at all" | future_resources == "No, not very" ~ "Not concerned",
    TRUE ~ "Neutral"
  )) %>% 
  mutate(rate = 1) %>% 
  group_by(concern, school_type) %>% 
  summarize(n = sum(rate)) %>% 
  ungroup() %>% 
  group_by(school_type) %>% 
  mutate(sum = sum(n),
         pct = n/sum) %>% 
  ungroup() %>% 
  filter(!is.na(school_type)) %>% 
  mutate(school_type = factor(school_type, levels = c("Public charter school", "Public district school", "Independent (private) school"))) %>% 
  ggplot(., aes(school_type, pct, fill = concern)) +
  geom_col() +
  scale_fill_manual(values = c(transcend_cols[3], transcend_na, transcend_cols[1])) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1), labels = scales::percent_format()) +
  geom_text(aes(label = scales::label_percent(accuracy = 1)(pct)), 
            position = position_stack(vjust = .5),
            #hjust = 1,
            color = "white", 
            size = 5, 
            family = "sans") +
  theme(legend.position = "bottom", legend.direction = "horizontal", panel.grid.major.y = element_blank(), axis.text.y = element_text(size = 11)) +
  labs(x = "", y = "Percent of schools", title = "School leader concern with \nfuture sustainability", fill = "Level of concern") +
  scale_x_discrete(labels = wrap_format(20)) +
  guides(fill = guide_legend(reverse = T)) +
  coord_flip()

Schools leaders across grade bands reported similar levels of concern with future sustainability.

variables %>% 
  select(school_id, future_resources, grades_pk, grades_elementary, grades_middle, grades_high) %>% 
  mutate(concern = case_when(
    future_resources == "Yes, extremely" | future_resources == "Yes, somewhat" ~ "Concerned",
    future_resources == "No, not at all" | future_resources == "No, not very" ~ "Not concerned",
    TRUE ~ "Neutral"
  )) %>% 
  group_by(concern) %>% 
  summarize(PK = sum(grades_pk),
            Elementary = sum(grades_elementary),
            Middle = sum(grades_middle),
            High = sum(grades_high)) %>% 
  ungroup() %>% 
  pivot_longer(cols = c(PK, Elementary, Middle, High),
               names_to = "school_level",
               values_to = "n") %>% 
  group_by(school_level) %>% 
  mutate(sum = sum(n),
         pct = n/sum) %>% 
  ungroup() %>% 
  mutate(school_level = factor(school_level, levels = c("Elementary", "High", "Middle", "PK"))) %>% 
  ggplot(., aes(school_level, pct, fill = concern)) +
  geom_col() +
  scale_fill_manual(values = c(transcend_cols[3], transcend_na, transcend_cols[1])) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1), labels = scales::percent_format()) +
  geom_text(aes(label = scales::label_percent(accuracy = 1)(pct)), 
            position = position_stack(vjust = .5),
            #hjust = 1,
            color = "white", 
            size = 5, 
            family = "sans") +
  theme(legend.position = "bottom", legend.direction = "horizontal", panel.grid.major.y = element_blank(), axis.text.y = element_text(size = 11)) +
  guides(fill = guide_legend(reverse = T)) +
  labs(x = "", y = "Percent of schools", title = "School leader concern with \nfuture sustainability", fill = "Level of concern") +
  coord_flip()

Schools reporting additional school descriptors had higher levels of concern about future sustainability, particularly hybrid schools, virtual schools, and homeschooling cooperatives. However, the number of schools that fall into each of these categories is much less stable than for other categories. For instance, homeschooling cooperatives only describe 3 schools in our sample, thus, the proportion of concern is accurate, but we should be careful about the amount of emphasis we place on this finding as it only represents a tiny fraction of our sample.

variables %>% 
  select(school_id, future_resources, starts_with("school_desc"), -school_desc_other_text) %>% 
  mutate(concern = case_when(
    future_resources == "Yes, extremely" | future_resources == "Yes, somewhat" ~ "Concerned",
    future_resources == "No, not at all" | future_resources == "No, not very" ~ "Not concerned",
    TRUE ~ "Neutral"
  )) %>% 
  group_by(concern) %>% 
  summarize(`Homeschooling cooperative` = sum(school_desc_homeschool),
            `Hybrid school` = sum(school_desc_hybrid),
            Microschool = sum(school_desc_micro),
            `Part time school` = sum(school_desc_part_time),
            `School within school` = sum(school_desc_sws),
            `Virtual school` = sum(school_desc_virtual),
            Other = sum(school_desc_other)) %>% 
  ungroup() %>% 
  pivot_longer(cols = !concern,
               names_to = "school_desc",
               values_to = "n") %>% 
  group_by(school_desc) %>% 
  mutate(sum = sum(n),
         pct = n/sum) %>% 
  ungroup() %>% 
  mutate(school_desc = factor(school_desc, levels = c("Virtual school", "Hybrid school", "Homeschooling cooperative", "Other", "Part time school", "School within school", "Microschool"))) %>% 
  ggplot(., aes(school_desc, pct, fill = concern)) +
  geom_col() +
  scale_fill_manual(values = c(transcend_cols[3], transcend_na, transcend_cols[1])) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1), labels = scales::percent_format()) +
  theme(legend.position = "bottom", legend.direction = "horizontal", panel.grid.major.y = element_blank(), axis.text.y = element_text(size = 11)) +
  geom_text(aes(label = scales::label_percent(accuracy = 1)(pct)), 
            position = position_stack(vjust = .5),
            #hjust = 1,
            color = "white", 
            size = 5, 
            family = "sans") +
  labs(x = "", y = "Percent of schools", title = "School leader concern with \nfuture sustainability", fill = "Level of concern") +
  scale_x_discrete(labels = wrap_format(20)) +
  guides(fill = guide_legend(reverse = T)) +
  coord_flip()

In the figure below, I ran a logistic regression model to estimate the likelihood for a school leader to indicate they were concerned with future sustainability, using school characteristics as additional covariates. Note that I transformed the future_resources variable into a binary, with 1 indicating concern (“Yes, extremely”; “Yes, somewhat). I’m open to feedback about alternate modeling choices–I’m less familiar with multivariate approaches and was not entirely sure how to set it up or if it was a preferable approach.

School that are predominantly BIPOC-led were far more likely to indicate they were concerned about future sustainability, more than 20x more likely than schools with 0-25% leaders of color. Elementary schools, charter schools, and rural schools or schools serving students in more than one geographic locale were all also more likely to indicate they were concerned about future sustainability.

#prep data
mod_dat <- variables %>% 
  select(school_id, future_resources, school_locale, school_type, grades_pk, grades_elementary, grades_middle, grades_high, school_enrollment, pct_bipoc, pct_ell, pct_frpl, pct_swd, teaching_diversity, leadership_diversity) %>% 
  mutate(future_resources = case_when(
    future_resources == "Yes, extremely" ~ 1,
    future_resources == "Yes, somewhat" ~ 1,
    TRUE ~ 0
  ),
  teaching_diversity = gsub("people", "teachers", teaching_diversity),
  leadership_diversity = gsub("people", "leaders", leadership_diversity),
  school_locale = factor(school_locale, levels = c("Urban", "Suburban", "Rural", "Multiple")),
  school_type = factor(school_type, levels = c("Public district school", "Public charter school", "Independent (private) school")),
  teaching_diversity = factor(teaching_diversity, levels = c("0 - 24% teachers of color", "25 - 49% teachers of color", "50 - 74% teachers of color", "75 - 100% teachers of color")),
  leadership_diversity = factor(leadership_diversity, levels = c("0 - 24% leaders of color", "25 - 49% leaders of color", "50 - 74% leaders of color", "75 - 100% leaders of color")),
  school_enrollment = as.numeric(scale(school_enrollment, center = TRUE, scale = TRUE))) %>% 
  mutate(across(starts_with("pct"), ~as.numeric(scale(., center = TRUE, scale = TRUE))))
#model
mod1 <- glm(future_resources ~ school_locale + school_type + grades_pk + grades_elementary + grades_middle + grades_high + school_enrollment + pct_bipoc + pct_ell + pct_frpl + pct_swd + teaching_diversity + leadership_diversity,
               family = "binomial",
               data = mod_dat)
# set labels
cov_labels <- c(
  "school_typeIndependent (private) school" = "Independent (private) school",
  "school_typePublic charter school" = "Public charter school",
  "grades_pk" = "PreK",
  "grades_elementary" = "Elementary",
  "grades_middle" = "Middle",
  "grades_high" = "High",
  "school_enrollment" = "School Enrollment",
  "pct_bipoc" = "% BIPOC students",
  "pct_ell" = "% EL-designated students",
  "pct_frpl" = "% FRPL-eligible",
  "pct_swd" = "% Students with disabilities",
  "school_localeMultiple" = "Multiple locales",
  "school_localeSuburban" = "Suburban",
  "school_localeRural" = "Rural",
  "leadership_diversity25 - 49% leaders of color" = "25-49% leaders of color",
  "leadership_diversity50 - 74% leaders of color" = "50-74% leaders of color",
  "leadership_diversity75 - 100% leaders of color" = "75-100% leaders of color",
  "teaching_diversity25 - 49% teachers of color" = "25-49% teachers of color",
  "teaching_diversity50 - 74% teachers of color" = "50-74% teachers of color",
  "teaching_diversity75 - 100% teachers of color" = "75-100% teachers of color"
)
# plot
tidy(mod1, effects = "ran_pars", conf.int = TRUE) %>%
  filter(term != "(Intercept)") %>%
  #filter(p.value >= .5) %>% 
  mutate(exp_est = exp(estimate), 
         exp_min = exp(estimate - std.error), 
         exp_max = exp(estimate + std.error)) %>% 
  mutate(term = cov_labels[term]) %>% 
ggplot(., aes(y = fct_reorder(term, exp_est), x = exp_est)) +
  geom_linerange(aes(xmin = exp_min,
                     xmax = exp_max),
                 color = "blue") +
  geom_point() +
  geom_vline(xintercept = 1) +
  scale_x_continuous(
    trans = "log",
    breaks = c(.0625, .2, .5, 1, 2, 5, 16),
    labels = str_wrap(c("1/16 as likely", "1/5 as likely", "1/2 as likely", "Even", "2x as likely", "5x as likely", "16x as likely"), 10),
    expand = expansion(0, 0)
  ) +
  theme_transcend +
  theme(panel.grid.major.y = element_blank()) +
  labs(
    x = "Likelihood",
    y = "",
    title = str_wrap("School characteristics predicting higher concern with future sustainability", 60))

Cause for concern

Guiding question: For leaders who were concerned about future sustainability, what reasons did they cite for their concern?

School leaders expressed the most concern over funding, including availability of local funds and private funding from foundations or donors. They were also concerned about staffing shortages and inflation/increasing prices, to a slightly smaller extent.

#nice labels
barrier_labs <- variables %>% 
  select(starts_with("barrier"), -barrier_other_text) %>% 
  pivot_longer(cols = starts_with("barrier"),
               names_to = "barrier",
               values_to = "response") %>% 
  select(barrier) %>% 
  unique() %>% 
  mutate(label = case_when(
    barrier == "barrier_local_funds" ~ "Availability of local funds",
    barrier == "barrier_private_funds" ~ "Availability of private funding from foundations or donors",
    barrier == "barrier_donations" ~ "Changes to in-kind donations",
    barrier == "barrier_inflation" ~ "Inflation/increasing prices",
    barrier == "barrier_enrollment" ~ "Changes in school enrollment",
    barrier == "barrier_shortage" ~ "Staffing shortages",
    barrier == "barrier_federal_funds" ~ "Expiration of federal relief funds",
    barrier == "barrier_other" ~ "Some other reason"
  ))
#plot
variables %>% 
  select(school_id, future_resources, starts_with("barrier"), -barrier_other_text) %>% 
  filter(future_resources == "Yes, extremely" | future_resources == "Yes, somewhat") %>% 
  pivot_longer(cols = starts_with("barrier"),
               names_to = "barrier",
               values_to = "response") %>% 
  group_by(barrier) %>% 
  summarize(n = sum(response)) %>% 
  left_join(barrier_labs, by = "barrier") %>% 
  ggplot(., aes(reorder(label, n,), n)) +
  geom_col(fill = transcend_cols[1]) +
  theme(legend.position = "none", 
        panel.grid.major.y = element_blank(),
        axis.text.y = element_text(size = 11)) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 95)) +
  scale_x_discrete(labels = wrap_format(35)) +
  geom_text(aes(label = n), nudge_y = 0.5, hjust = 0, color = transcend_na, fontface = "bold", size = 5.5, family = "sans") +
  coord_flip() +
  labs(x = "",
       y = "",
       title = "Reasons for concern over future sustainability")

The following graph provides the same information, with the percentage of schools that selected each reason from those who indicated they were concerned (N = 115) rather than a raw number.

variables %>% 
  select(school_id, future_resources, starts_with("barrier"), -barrier_other_text) %>% 
  filter(future_resources == "Yes, extremely" | future_resources == "Yes, somewhat") %>% 
  pivot_longer(cols = starts_with("barrier"),
               names_to = "barrier",
               values_to = "response") %>% 
  group_by(barrier) %>% 
  summarize(n = sum(response),
            pct = n/115) %>% 
  left_join(barrier_labs, by = "barrier") %>% 
  ggplot(., aes(reorder(label, pct), pct)) +
  geom_col(fill = transcend_cols[1]) +
  theme(legend.position = "none", 
        panel.grid.major.y = element_blank(),
        axis.text.y = element_text(size = 11)) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1), labels = scales::percent_format()) +
  scale_x_discrete(labels = wrap_format(35)) +
  geom_text(aes(label = scales::label_percent(accuracy = 1)(pct)), hjust = 0, color = transcend_na, fontface = "bold", size = 5.5, family = "sans") +
  coord_flip() +
  labs(x = "",
       y = "",
       title = "Reasons for concern over future sustainability")

Of leaders who expressed concern over future sustainability, 15 chose a reason that was not listed and provided a written response describing an additional factor that may hinder their ability to sustain adequate resources for their school in the next five years. I created a word cloud (thanks for the inspriration and code, Anwesha!) but it wasn’t especially interesting.

# Pull open response answers
responses <- variables %>% 
  select(future_resources, barrier_other_text) %>% 
  filter(future_resources == "Yes, extremely" | future_resources == "Yes, somewhat") %>% 
  filter(!is.na(barrier_other_text))

# Add more stop words
custom_stopwords <- c("yes", "somewhat", "extremely", "school", "schools", "funding", "also", "will", "add", "state", "education", "community")

# Create a text corpus
corpus <- Corpus(VectorSource(responses))

# Text preprocessing
corpus <- tm_map(corpus, content_transformer(tolower))  # Convert to lower case
corpus <- tm_map(corpus, removePunctuation)             # Remove punctuation
corpus <- tm_map(corpus, removeNumbers)                 # Remove numbers
corpus <- tm_map(corpus, removeWords, c(stopwords("english"), custom_stopwords))  # Remove stopwords
corpus <- tm_map(corpus, stripWhitespace)               # Strip whitespace

# Create a document-term matrix
dtm <- TermDocumentMatrix(corpus)

# Convert the matrix to a dataframe
matrix <- as.matrix(dtm)
word_freqs <- sort(rowSums(matrix), decreasing=TRUE)
data <- data.frame(word=names(word_freqs), freq=word_freqs)

# Generate the wordcloud
set.seed(1234) # For reproducibility
wordcloud(words = data$word, freq = data$freq, min.freq = 1,
          max.words=200, random.order=FALSE, rot.per=0.35, 
          colors=brewer.pal(8, "Dark2"))

Here’s the original responses:

datatable(responses)

Guiding question: Did the reason for concern vary by school characteristics?

Generally, yes. How school characteristics differed by rationale provided for concern varies by the specific factor of interest, but some high-level takeaways seem to be that leadership diversity and geographic region are playing a large role in the variance, and to a lesser extent, school level and school governance. Predominantly BIPOC-led schools with at least 75% of leaders identifying as a person of color were more likely to select every factor as something that may hinder their ability to sustain adequate resources for their school in the next 5 years.

Note that for this set of models I used a logistic regression model to calculate the likelihood of schools with certain characteristics to select a given factor. School leaders were able to select as many factors as they’d like, so I treated them as independent answers/outcomes using dummy codes.

# model function
log_model <- function(outcome){ #outcome needs to be dummy
  #prep data
  data <-  variables %>% 
    select({{outcome}}, school_id, school_locale, school_type, grades_pk, grades_elementary, grades_middle, grades_high, school_enrollment, pct_bipoc, pct_ell, pct_frpl, pct_swd, teaching_diversity, leadership_diversity) %>% 
    mutate(teaching_diversity = gsub("people", "teachers", teaching_diversity),
           leadership_diversity = gsub("people", "leaders", leadership_diversity),
           school_locale = factor(school_locale, levels = c("Urban", "Suburban", "Rural", "Multiple")),
           school_type = factor(school_type, levels = c("Public district school", "Public charter school", "Independent (private) school")),
           teaching_diversity = factor(teaching_diversity, levels = c("0 - 24% teachers of color", "25 - 49% teachers of color", "50 - 74% teachers of color", "75 - 100% teachers of color")),
           leadership_diversity = factor(leadership_diversity, levels = c("0 - 24% leaders of color", "25 - 49% leaders of color", "50 - 74% leaders of color", "75 - 100% leaders of color")),
           school_enrollment = as.numeric(scale(school_enrollment, center = TRUE, scale = TRUE))) %>% 
    mutate(across(starts_with("pct"), ~as.numeric(scale(., center = TRUE, scale = TRUE))))
  #model
  mod <- glm({{outcome}} ~ school_locale + school_type + grades_pk + grades_elementary + grades_middle + grades_high + school_enrollment + pct_bipoc + pct_ell + pct_frpl + pct_swd + teaching_diversity + leadership_diversity,
               family = "binomial",
               data = data)
  return(mod)
}
  #plot
plot_model <- function(title, data){
  plot <- tidy(data, effects = "ran_pars", conf.int = TRUE) %>%
  filter(term != "(Intercept)") %>%
  mutate(exp_est = exp(estimate), 
         exp_min = exp(estimate - std.error), 
         exp_max = exp(estimate + std.error)) %>% 
  mutate(term = cov_labels[term]) %>% 
  ggplot(., aes(y = fct_reorder(term, exp_est), x = exp_est)) +
  geom_linerange(aes(xmin = exp_min,
                     xmax = exp_max),
                 color = "blue") +
  geom_point() +
  geom_vline(xintercept = 1) +
  scale_x_continuous(
  trans = "log",
  breaks = c(.0625, .25, .5, 1, 2, 4, 16),
  labels = str_wrap(c("1/16 as likely", "1/4 as likely", "1/2 as likely", "Even", "2x as likely", "4x as likely", "16x as likely"), 10),
  expand = expansion(0, 0)
  ) +
  theme_transcend +
  theme(panel.grid.major.y = element_blank()) +
  labs(
    x = "",
    y = "",
    title = str_wrap(title, 60))
  return(plot)}
tidy_model <- function(data){
  data <- tidy(data, effects = "ran_pars", conf.int = TRUE) %>%
  filter(term != "(Intercept)") %>%
  mutate(exp_est = exp(estimate), 
         exp_min = exp(estimate - std.error), 
         exp_max = exp(estimate + std.error)) %>% 
  mutate(term = cov_labels[term])
  return(data)
}
local_funds <- log_model(variables$barrier_local_funds)
plot_model("School characteristics predicting higher concern with availability of local funds", local_funds)

local_funds <- tidy_model(local_funds) %>% 
  mutate(factor = "Availability of local funds")
private_funds <- log_model(variables$barrier_private_funds)
plot_model("School characteristics predicting higher concern with availability of private funds", private_funds)

private_funds <- tidy_model(private_funds) %>% 
  mutate(factor = "Availability of private funds")
donations <- log_model(variables$barrier_donations)
plot_model("School characteristics predicting higher concern with changes to in-kind donations", donations)

donations <- tidy_model(donations) %>% 
  mutate(factor = "Changes to in-kind donations")
inflation <- log_model(variables$barrier_inflation)
plot_model("School characteristics predicting higher concern with inflation/increasing prices", inflation)

inflation <- tidy_model(inflation) %>% 
  mutate(factor = "Inflation or increasing prices")
enrollment <- log_model(variables$barrier_enrollment)
plot_model("School characteristics predicting higher concern with changes in school enrollment", enrollment)

enrollment <- tidy_model(enrollment) %>% 
  mutate(factor = "Changes in school enrollment")
staffing <- log_model(variables$barrier_shortage)
plot_model("School characteristics predicting higher concern with staffing shortages", staffing)

staffing <- tidy_model(staffing) %>% 
  mutate(factor = "Staffing shortages")
fiscal_cliff <- log_model(variables$barrier_federal_funds)
plot_model("School characteristics predicting higher concern with expiration of federal relief funds", fiscal_cliff)

fiscal_cliff <- tidy_model(fiscal_cliff) %>% 
  mutate(factor = "Loss of federal relief funds")

Combined heatmap version:

#save plot
  # png(filename = "output/janette-images/figure_12.png",
  #   width = 9,
  #   height = 7,
  #   units = "in",
  #   res = 300)
#merge tidy data
transformative_factors <- bind_rows(local_funds, private_funds, donations, inflation, enrollment, staffing, fiscal_cliff) %>% 
  filter(term == "Suburban"| term == "Rural"| term == "Multiple locales"| term == "25-49% leaders of color"| term == "50-74% leaders of color"| term == "75-100% leaders of color") %>% 
  mutate(log_exp_est = log(exp_est),
         fill_color = ifelse(p.value >= 0.5, transcend_na,
                        ifelse(log_exp_est < 0, scales::gradient_n_pal(c(transcend_cols[3], "white"))((log_exp_est - min(log_exp_est)) / abs(min(log_exp_est))),
                               scales::gradient_n_pal(c("white", transcend_cols[1]))((log_exp_est - 0) / (max(log_exp_est) - 0)))))
#rm(local_funds, private_funds, donations, inflation, enrollment, staffing, fiscal_cliff)
#create heatmap
ggplot(transformative_factors, aes(factor, term, fill = fill_color)) + #fill = exp_est
  geom_tile() +
  #scale_fill_gradient(low = transcend_cols[3], high = transcend_cols[1]) +
  scale_fill_identity() +
  scale_x_discrete(expand = c(0,0), labels = wrap_format(13)) +
  labs(title = str_wrap("School characteristics predicting future factors of concern", 55),
       subtitle = str_wrap("Darker blue indicates that characteristic made a school more likely to select the factor as something that will be transformative in the next 5 years, and a lighter blue made a school less likely to select that factor.", 65),
       x = "",
       y = "") +
  theme(legend.position = "none",
        axis.text.x = element_text(size = 12))

# dev.off()

Future transformations

Guiding question: Which factors do school leaders think will be most transformative for schools in the next 5 years?

The top 3 factors school leaders cited that they expect will be most transformative for K-12 schooling in the next 5 years are teacher workforce issues, AI, and the ongoing mental health crisis. I was surprised to see politics/culture wars and pandemic learning loss so low on the list given recent conversations with leaders!

transform_labels <- variables %>% 
  select(starts_with("transform"), -transform_other_text) %>% 
  pivot_longer(cols = starts_with("transform"),
               names_to = "factor",
               values_to = "response") %>% 
  select(factor) %>% 
  unique() %>% 
  mutate(label = case_when(
    factor == "transform_ai" ~ "Artificial intelligence",
    factor == "transform_mental_health" ~ "Mental health crisis",
    factor == "transform_pandemic" ~ "Pandemic learning loss",
    factor == "transform_workforce" ~ "Teacher workforce issues",
    factor == "transform_enrollment" ~ "Changes in school enrollment",
    factor == "transform_politics" ~ "Politics/culture wars",
    factor == "transform_esa" ~ "ESAs or voucher policies",
    factor == "transform_climate" ~ "Climate change",
    factor == "transform_other" ~ "Some other reason"
  ))
# #save plot
#   png(filename = "output/janette-images/figure_9.png",
#     width = 9,
#     height = 7,
#     units = "in",
#     res = 300)
#plot
variables %>% 
  select(school_id, future_resources, starts_with("transform"), -transform_other_text) %>% 
  pivot_longer(cols = starts_with("transform"),
               names_to = "factor",
               values_to = "response") %>% 
  group_by(factor) %>% 
  summarize(n = sum(response)) %>% 
  left_join(transform_labels, by = "factor") %>% 
  ggplot(., aes(reorder(label, n,), n)) +
  geom_col(fill = transcend_cols[1]) +
  theme(legend.position = "none", 
        panel.grid.major.y = element_blank(),
        axis.text.y = element_text(size = 13)) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 150)) +
  scale_x_discrete(labels = wrap_format(25)) +
  geom_text(aes(label = n), nudge_y = 0.5, hjust = 0, color = transcend_na, fontface = "bold", size = 5.5, family = "sans") +
  coord_flip() +
  labs(x = "",
       y = "Number of schools",
       title = str_wrap("Factors Canopy school leaders think will be most transformative in the next 5 years", 55))

# dev.off()

15 school leaders chose to provide an open response to the question, citing some other factor that was not on our list.

# Pull open response answers
responses <- variables %>% 
  select(transform_other_text) %>% 
  filter(!is.na(transform_other_text))

# Create a text corpus
corpus <- Corpus(VectorSource(responses))

# Text preprocessing
corpus <- tm_map(corpus, content_transformer(tolower))  # Convert to lower case
corpus <- tm_map(corpus, removePunctuation)             # Remove punctuation
corpus <- tm_map(corpus, removeNumbers)                 # Remove numbers
corpus <- tm_map(corpus, removeWords, c(stopwords("english"), custom_stopwords))  # Remove stopwords
corpus <- tm_map(corpus, stripWhitespace)               # Strip whitespace

# Create a document-term matrix
dtm <- TermDocumentMatrix(corpus)

# Convert the matrix to a dataframe
matrix <- as.matrix(dtm)
word_freqs <- sort(rowSums(matrix), decreasing=TRUE)
data <- data.frame(word=names(word_freqs), freq=word_freqs)

# Generate the wordcloud
set.seed(1234) # For reproducibility
wordcloud(words = data$word, freq = data$freq, min.freq = 1,
          max.words=200, random.order=FALSE, rot.per=0.35, 
          colors=brewer.pal(8, "Dark2"))

datatable(responses)

Guiding question: Do the top factors school leaders think will be most transformative for schools in the next 5 years differ by school characteristics?

Yes - there were definitely differences in the characteristics more/less likely to select certain factors, but some of these should be interpreted with caution. Maybe only focus on the top 3 reasons (teacher workforce issues, AI, mental health crisis) which had larger sample sizes.

Note that I used the same logistic regression models as above. I am getting warning messages for both sets of models indicating that there is perfect separation resulting in predicted probabilities of 0 or 1. Should I apply some kind of penalty to reign in large coefficients or remove some of the predictors? Sample size is a definite issue with some of these factors that were not highly selected, which you can see easily with the large confidence intervals. Be careful with any interpretation of those findings.

Quick overview table as a reminder:

variables %>% 
  select(school_id, starts_with("transform"), -transform_other_text) %>% 
  pivot_longer(cols = starts_with("transform"),
               names_to = "factor",
               values_to = "response") %>% 
  group_by(factor) %>% 
  summarize(n = sum(response)) %>% 
  datatable()
ai <- log_model(variables$transform_ai)
plot_model("School characteristics predicting citing AI as a factor they predict will shape K-12 education in the next 5 years", ai)

ai <- tidy_model(ai) %>% 
  mutate(factor = "Artificial intelligence")

Really surprised to see PK schools here!

mental_health <- log_model(variables$transform_mental_health)
plot_model("School characteristics predicting citing the mental health crisis as a factor they predict will shape K-12 education in the next 5 years", mental_health)

mental_health <- tidy_model(mental_health) %>% 
  mutate(factor = "Mental health crisis")
pandemic <- log_model(variables$transform_pandemic)
plot_model("School characteristics predicting citing pandemic learning loss as a factor they predict will shape K-12 education in the next 5 years", pandemic)

pandemic <- tidy_model(pandemic) %>% 
  mutate(factor = "Pandemic learning loss")
workforce <- log_model(variables$transform_workforce)
plot_model("School characteristics predicting citing teacher workforce issues as a factor they predict will shape K-12 education in the next 5 years", workforce)

workforce <- tidy_model(workforce) %>% 
  mutate(factor = "Teacher workforce issues")
enrollment <- log_model(variables$transform_enrollment)
plot_model("School characteristics predicting citing changes in school enrollment as a factor they predict will shape K-12 education in the next 5 years", enrollment)

enrollment <- tidy_model(enrollment) %>% 
  mutate(factor = "Changes in school enrollment")

Not super shocked by this, but interesting to see that schools with higher proportions of leaders of color and teachers of color were much more likely to cite politics/culture wars as a factor.

politics <- log_model(variables$transform_politics)
plot_model("School characteristics predicting citing politics/culture wars as a factor they predict will shape K-12 education in the next 5 years", politics)

politics <- tidy_model(politics) %>% 
  mutate(factor = "Politics/culture wars")

Got a huge coefficient for private schools here–150x more likely to have chosen this factor! Only 22 schools selected this factor, and 14 of them were private schools.

esa <- log_model(variables$transform_esa)
plot_model("School characteristics predicting citing Education Savings Accounts and vouchers as a factor they predict will shape K-12 education in the next 5 years", esa)

esa <- tidy_model(esa) %>% 
  mutate(factor = "ESAs or voucher policies")
#quick check
variables %>% 
  select(school_type, transform_esa) %>% 
  group_by(school_type) %>% 
  summarize(n = sum(transform_esa))
## # A tibble: 3 × 2
##   school_type                      n
##   <chr>                        <int>
## 1 Independent (private) school    14
## 2 Public charter school            3
## 3 Public district school           5

Only 10 schools selected climate change as a factor, leading to really unstable modeling results.

climate <- log_model(variables$transform_climate)
plot_model("School characteristics predicting citing climate change as a factor they predict will shape K-12 education in the next 5 years", climate)

climate <- tidy_model(climate) %>% 
  mutate(factor = "Climate change")

Combined:

#combine tidy data
transformative_factors <- bind_rows(ai, mental_health, workforce) %>% 
  mutate(log_exp_est = log(exp_est),
         fill_color = ifelse(p.value <= 0.5, transcend_na,
                        ifelse(log_exp_est < 0, scales::gradient_n_pal(c(transcend_cols[3], "white"))((log_exp_est - min(log_exp_est)) / abs(min(log_exp_est))),
                               scales::gradient_n_pal(c("white", transcend_cols[1]))((log_exp_est - 0) / (max(log_exp_est) - 0))))) %>% 
  filter(fill_color != "#9D9FA2")
#rm(ai, mental_health, pandemic, workforce, enrollment, politics, esa)
# #save plot
#   png(filename = "output/janette-images/figure_10.png",
#     width = 9,
#     height = 7,
#     units = "in",
#     res = 300)
#plot
ggplot(transformative_factors, aes(factor, reorder(term, log_exp_est), fill = fill_color)) +
  geom_tile(width = 0.8) +
  #scale_fill_gradient(low = transcend_cols[4], high = transcend_cols[1]) +
  scale_fill_identity() +
  scale_x_discrete(expand = c(0,0)) +
  labs(title = str_wrap("School characteristics predicting future transformative factors", 50),
       subtitle = str_wrap("Blue indicates a characteristic that made a school more likely to select the factor as something that will be transformative in the next 5 years and red indicates a characteristic that made a school less likely to select that factor, with darker colors indicating a stronger relationship.", 75),
       x = "",
       y = "") +
  theme(legend.position = "none",
        panel.grid.major = element_blank(),
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.text.y = element_text(size = 9)) +
  facet_wrap(~factor,
             scales = "free",
             strip.position = "bottom",
             labeller = label_wrap_gen(13))

# dev.off()

Future Concern by School Age

Guiding comment: What do you think about looking at leaders’ concern about sustainability by school age (as far as we can tell)? I wonder if we could replicate this analysis from 2023 to identify a group of more established vs. less established schools (using their 2024 data), and then test the likelihood that each group is “extremely” or “somewhat” concerned with sustainability. It could be powerful to either say that less established schools are more concerned, or that concern is pretty equally distributed…

# clean up environment
rm(ai, barrier_labs, climate, corpus, dictionary, donations, dtm, enrollment, esa, fiscal_cliff, inflation, local_funds, matrix, mental_health, mod_dat, mod1, pandemic, politics, private_funds, responses, staffing, transform_labels, transformative_factors, workforce, cov_labels, custom_stopwords, word_freqs, log_model, plot_model, tidy_model, data)

Defining established schools

Let’s begin by exploring how we can define more vs. less established schools.

Looks like 95 of 189 schools will be easy to categorize - they chose the same length of time for all of their core practices. The rest we will need to categorize differently.

For easy categories:
* new = all core tags reported in use for less than a year
* new = all core tags reported in use for 1-2 years
* med = all core tags reported in use for 3-4 years
* old = all core tags reported in use for 5+ years

#create dataset of variables and time - might not need
time_dat <- import(here("data/longitudinal", "full-tags-long.csv")) %>% 
  filter(year == 2024) %>% 
  select(school_id, var, time)
#pull times relative to schools, what time are they reporting?
school_time <- time_dat %>% 
  filter(time != 0) %>% 
  select(school_id, time) %>% 
  mutate(rate = 1) %>% 
  group_by(school_id, time) %>% 
  summarize(n = sum(rate)) %>% 
#assign a category for the cut-and-dry
  mutate(group = case_when(
    n == 5 & time == "5+ years" ~ "old",
    n == 5 & time == "3-4 years" ~ "med",
    n == 5 & time == "1-2 years" ~ "new",
    n == 5 & time == "Less than a year" ~ "new"
  )) %>% 
  ungroup() %>% 
  arrange(-n)
datatable(school_time)

We can easily extend these categories to any schools with most tags in a specific category. For example, if 4/5 of your core practices you’ve been practicing for at least 5 years, then we’ll consider you an established school. And if any of your core practices have been in place that long, then you’re also an established school.

This accounts for most of the additional schools, for a total N filled of 179 (out of 189).

school_time <- school_time %>% 
  group_by(school_id) %>% 
  mutate(group = case_when(
    any(time == "5+ years") ~ "old",
    n == 4 & time == "3-4 years" ~ "med",
    n == 4 & time == "1-2 years" ~ "new",
    n == 4 & time == "Less than a year" ~ "new",
    TRUE ~ as.character(group)
  )) %>% 
  ungroup() %>% 
  arrange(school_id, -n) 

What do the final 10 schools look like?

I set these manually:
* Schools 47, 746, 764 & 846 are “med” using similar logic as above that the timing for the oldest core tag will take precedence
* Schools 246 & 834 is “med” because they only chose 1 core tag and have been using it 3-4 years
* School 809 is “new” because they only chose 1 core tag and have been using it less than a year
* School 823 is “new” because they only chose 1 core tag and have been using it 1-2 years
* Schools 600 and 753 is “new” because their core tags have been in place 2 or fewer years (they weren’t picked up previously because they only selected 4 core tags, rather than 5)

#explore
school_time %>% 
  filter(is.na(group)) %>% 
  datatable()
#assign final manual values
school_time <- school_time %>% 
  mutate(group = case_when(
    school_id == 47 ~ "med",
    school_id == 746 ~ "med",
    school_id == 764 ~ "med",
    school_id == 846 ~ "med",
    school_id == 246 ~ "med",
    school_id == 834 ~ "med",
    school_id == 809 ~ "new",
    school_id == 823 ~ "new",
    school_id == 600 ~ "new",
    school_id == 753 ~ "new",
    TRUE ~ as.character(group)
  )) %>% 
  select(school_id, group) %>% 
  unique() %>% 
  mutate(group = factor(group, levels = c("old", "med", "new"), labels = c("Established", "Somewhat established", "New or newly-redesigned")))

Descriptives

Distribution of schools

school_time %>% 
  mutate(rate = 1) %>% 
  group_by(group) %>% 
  summarize(n = sum(rate),
            pct = round(n/189, 2)) %>% 
  ungroup() %>% 
  ggplot(., aes(group, pct)) +
  geom_col(fill = transcend_cols[1]) +
  scale_y_continuous(expand = c(0,0), 
                     limits = c(0, 1),
                     labels = scales::percent_format()) +
  theme(panel.grid.major.x = element_blank()) +
  labs(x = "", y = "Percent of schools", title = "Distribution of schools by how established they are") +
  geom_text(aes(label = scales::label_percent(accuracy = 1)(pct)),
            nudge_y = 0.01,
            vjust = 0,
            color = transcend_na,
            fontface = "bold",
            size = 5.5,
            family = "sans") +
  scale_x_discrete(labels = wrap_format(15))

Concern by school age:

variables %>% 
  left_join(school_time, by = "school_id") %>% 
  select(school_id, future_resources, group) %>% 
  mutate(concern = case_when(
    future_resources == "Yes, extremely" | future_resources == "Yes, somewhat" ~ "Concerned",
    future_resources == "No, not at all" | future_resources == "No, not very" ~ "Not concerned",
    TRUE ~ "Neutral"
  )) %>% 
  mutate(rate = 1) %>% 
  group_by(concern, group) %>% 
  summarize(n = sum(rate)) %>% 
  ungroup() %>% 
  group_by(group) %>% 
  mutate(sum = sum(n),
         pct = n/sum) %>% 
  ungroup() %>% 
  ggplot(., aes(group, pct, fill = concern)) + 
  geom_col() +
  scale_fill_manual(values = c(transcend_cols[3], transcend_na, transcend_cols[1])) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1), labels = scales::percent_format()) +
  geom_text(aes(label = scales::label_percent(accuracy = 1)(pct)), 
            position = position_stack(vjust = .5),
            color = "white", 
            size = 5, 
            family = "sans") +
  theme(legend.position = "bottom", legend.direction = "horizontal", panel.grid.major.y = element_blank(), axis.text.y = element_text(size = 11)) +
  labs(x = "", y = "Percent of schools", title = "School leader concern with future sustainability", fill = "Level of concern") +
  scale_x_discrete(labels = wrap_format(20)) +
  guides(fill = guide_legend(reverse = T)) +
  coord_flip()

Modeling

I reran the model above, adding how established schools were as a covariate in the model. We find that how established schools are, based on the definitions above, did not influence the likelihood that a school leader would indicate they were concerned with future sustainability. The estimates were extremely close to even likelihood and were not statistically significant.

This is not too surprising given the barchart above, which showed concern with future sustainability was very similar across schools of different ages. We also have a much smaller group of “new” schools (which is also not that surprising because we intentionally over-sampled repeat schools that were more likely to have been using practices for longer lengths of time) so this could look different had we had a much larger group of younger schools.

#prep data
mod_dat <- variables %>% 
  left_join(school_time, by = "school_id") %>% 
  select(school_id, future_resources, school_locale, school_type, grades_pk, grades_elementary, grades_middle, grades_high, school_enrollment, pct_bipoc, pct_ell, pct_frpl, pct_swd, leadership_diversity, group) %>% 
  mutate(future_resources = case_when(
    future_resources == "Yes, extremely" ~ 1,
    future_resources == "Yes, somewhat" ~ 1,
    TRUE ~ 0
  ),
  leadership_diversity = gsub("people", "leaders", leadership_diversity),
  school_locale = factor(school_locale, levels = c("Urban", "Suburban", "Rural", "Multiple")),
  school_type = factor(school_type, levels = c("Public district school", "Public charter school", "Independent (private) school")),
  leadership_diversity = factor(leadership_diversity, levels = c("0 - 24% leaders of color", "25 - 49% leaders of color", "50 - 74% leaders of color", "75 - 100% leaders of color")),
  school_enrollment = as.numeric(scale(school_enrollment, center = TRUE, scale = TRUE))) %>% 
  mutate(across(starts_with("pct"), ~as.numeric(scale(., center = TRUE, scale = TRUE))))
#model
mod <- glm(future_resources ~ school_locale + school_type + grades_pk + grades_elementary + grades_middle + grades_high + school_enrollment + pct_bipoc + pct_ell + pct_frpl + pct_swd + leadership_diversity + group,
               family = "binomial",
               data = mod_dat)
# set labels
cov_labels <- c(
  "school_typeIndependent (private) school" = "Independent (private) school",
  "school_typePublic charter school" = "Public charter school",
  "grades_pk" = "PreK",
  "grades_elementary" = "Elementary",
  "grades_middle" = "Middle",
  "grades_high" = "High",
  "school_enrollment" = "School Enrollment",
  "pct_bipoc" = "% BIPOC students",
  "pct_ell" = "% EL-designated students",
  "pct_frpl" = "% FRPL-eligible",
  "pct_swd" = "% Students with disabilities",
  "school_localeMultiple" = "Multiple locales",
  "school_localeSuburban" = "Suburban",
  "school_localeRural" = "Rural",
  "leadership_diversity25 - 49% leaders of color" = "25-49% leaders of color",
  "leadership_diversity50 - 74% leaders of color" = "50-74% leaders of color",
  "leadership_diversity75 - 100% leaders of color" = "75-100% leaders of color",
  "groupSomewhat established" = "Somewhat established",
  "groupNew or newly-redesigned" = "New or newly redesigned"
)
# plot
tidy(mod, effects = "ran_pars", conf.int = TRUE) %>%
  filter(term != "(Intercept)") %>%
  mutate(exp_est = exp(estimate), 
         exp_min = exp(estimate - std.error), 
         exp_max = exp(estimate + std.error)) %>% 
  mutate(term = cov_labels[term]) %>% 
ggplot(., aes(y = fct_reorder(term, exp_est), x = exp_est)) +
  geom_linerange(aes(xmin = exp_min,
                     xmax = exp_max),
                 color = "blue") +
  geom_point() +
  geom_vline(xintercept = 1) +
  scale_x_continuous(
    trans = "log",
    breaks = c(.0625, .2, .5, 1, 2, 5, 16),
    labels = str_wrap(c("1/16 as likely", "1/5 as likely", "1/2 as likely", "Even", "2x as likely", "5x as likely", "16x as likely"), 10),
    expand = expansion(0, 0)
  ) +
  theme_transcend +
  theme(panel.grid.major.y = element_blank()) +
  labs(
    x = "Likelihood",
    y = "",
    title = str_wrap("School characteristics predicting higher concern with future sustainability", 60))